"""Build .docx from paper.md + all output tables."""

import re
from pathlib import Path
from docx import Document
from docx.shared import Inches, Pt, Cm, RGBColor
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.enum.table import WD_TABLE_ALIGNMENT

PROJECT = Path("/mnt/c/demographics_capital_flows/trilemma")
TABLES_DIR = PROJECT / "output" / "tables"
PAPER_MD = PROJECT / "paper" / "paper.md"
OUTPUT = PROJECT / "paper" / "trilemma_paper.docx"


def set_cell_text(cell, text, bold=False, size=Pt(9), font_name='Times New Roman'):
    cell.text = ""
    p = cell.paragraphs[0]
    p.alignment = WD_ALIGN_PARAGRAPH.CENTER
    run = p.add_run(text)
    run.font.size = size
    run.font.name = font_name
    run.bold = bold


def add_md_table(doc, md_text, title=None):
    lines = [l.strip() for l in md_text.strip().split('\n') if l.strip()]
    table_lines = [l for l in lines if '|' in l and not l.startswith('#')]
    if not table_lines:
        return

    rows = []
    for line in table_lines:
        cells = [c.strip() for c in line.split('|')]
        cells = [c for c in cells if c != '']
        if all(set(c) <= set('-: ') for c in cells):
            continue
        rows.append(cells)

    if len(rows) < 2:
        return

    if title:
        p = doc.add_paragraph()
        run = p.add_run(title)
        run.bold = True
        run.font.size = Pt(11)
        run.font.name = 'Times New Roman'

    n_cols = max(len(r) for r in rows)
    table = doc.add_table(rows=len(rows), cols=n_cols)
    table.style = 'Light Shading'
    table.alignment = WD_TABLE_ALIGNMENT.CENTER

    for i, row_data in enumerate(rows):
        for j, cell_text in enumerate(row_data):
            if j < n_cols:
                set_cell_text(table.cell(i, j), cell_text, bold=(i == 0))

    doc.add_paragraph()


def parse_md_file(filepath):
    text = filepath.read_text()
    sections = []
    lines = text.split('\n')
    current_title = None
    table_buf = []

    for line in lines:
        if line.startswith('#'):
            if table_buf:
                sections.append(('table', current_title, '\n'.join(table_buf)))
                table_buf = []
            current_title = re.sub(r'^#+\s*', '', line).strip()
        elif '|' in line:
            table_buf.append(line)
        elif line.strip().startswith('*') and not table_buf:
            sections.append(('note', line.strip()))

    if table_buf:
        sections.append(('table', current_title, '\n'.join(table_buf)))

    return sections


def build_docx():
    doc = Document()

    for section in doc.sections:
        section.top_margin = Cm(2.54)
        section.bottom_margin = Cm(2.54)
        section.left_margin = Cm(2.54)
        section.right_margin = Cm(2.54)

    style = doc.styles['Normal']
    style.font.name = 'Times New Roman'
    style.font.size = Pt(12)
    style.paragraph_format.space_after = Pt(6)
    style.paragraph_format.line_spacing = 1.15

    paper_text = PAPER_MD.read_text()
    lines = paper_text.split('\n')

    title_emitted = False
    i = 0
    while i < len(lines):
        line = lines[i]

        if line.startswith('# ') and not line.startswith('## ') and not title_emitted:
            title = line[2:].strip()
            p = doc.add_heading(title, level=0)
            p.alignment = WD_ALIGN_PARAGRAPH.CENTER
            for run in p.runs:
                run.font.size = Pt(16)
                run.font.name = 'Times New Roman'
                run.font.color.rgb = RGBColor(0, 0, 0)

            # SSRN front matter
            p = doc.add_paragraph()
            p.alignment = WD_ALIGN_PARAGRAPH.CENTER
            run = p.add_run('Brian Peters')
            run.font.size = Pt(13)
            run.font.name = 'Times New Roman'
            run.bold = True

            p = doc.add_paragraph()
            p.alignment = WD_ALIGN_PARAGRAPH.CENTER
            run = p.add_run('Independent Researcher')
            run.font.size = Pt(11)
            run.font.name = 'Times New Roman'

            p = doc.add_paragraph()
            p.alignment = WD_ALIGN_PARAGRAPH.CENTER
            run = p.add_run('March 2026')
            run.font.size = Pt(11)
            run.font.name = 'Times New Roman'

            p = doc.add_paragraph()
            p.alignment = WD_ALIGN_PARAGRAPH.CENTER
            run = p.add_run('Perspective on Risk Working Paper Series')
            run.font.size = Pt(11)
            run.font.name = 'Times New Roman'
            run.italic = True

            doc.add_paragraph()  # spacing

            title_emitted = True
            i += 1
            continue

        if line.startswith('# ') and not line.startswith('## '):
            title = line[2:].strip()
            p = doc.add_heading(title, level=0)
            p.alignment = WD_ALIGN_PARAGRAPH.CENTER
            for run in p.runs:
                run.font.size = Pt(16)
                run.font.name = 'Times New Roman'
                run.font.color.rgb = RGBColor(0, 0, 0)
            i += 1
            continue

        if line.startswith('## '):
            heading = line[3:].strip()
            p = doc.add_heading(heading, level=1)
            for run in p.runs:
                run.font.size = Pt(14)
                run.font.name = 'Times New Roman'
                run.font.color.rgb = RGBColor(0, 0, 0)
            i += 1
            continue

        if line.startswith('#### '):
            heading = line[5:].strip()
            p = doc.add_heading(heading, level=3)
            for run in p.runs:
                run.font.size = Pt(11)
                run.font.name = 'Times New Roman'
                run.font.color.rgb = RGBColor(0, 0, 0)
                run.italic = True
            i += 1
            continue

        if line.startswith('### '):
            heading = line[4:].strip()
            p = doc.add_heading(heading, level=2)
            for run in p.runs:
                run.font.size = Pt(12)
                run.font.name = 'Times New Roman'
                run.font.color.rgb = RGBColor(0, 0, 0)
            i += 1
            continue

        if line.strip().startswith('$$'):
            math_lines = [line.strip().replace('$$', '')]
            i += 1
            while i < len(lines) and '$$' not in lines[i]:
                math_lines.append(lines[i].strip())
                i += 1
            if i < len(lines):
                math_lines.append(lines[i].strip().replace('$$', ''))
                i += 1
            math_text = ' '.join(l for l in math_lines if l)
            for old, new in [('\\text{', ''), ('}', ''), ('\\log', 'log'),
                             ('\\cdot', '\u00b7'), ('\\cdots', '...'),
                             ('\\varepsilon', '\u03b5'), ('\\alpha', '\u03b1'),
                             ('\\beta', '\u03b2'), ('\\gamma', '\u03b3'),
                             ('\\delta', '\u03b4'), ('\\Delta', '\u0394'),
                             ('\\hat', ''), ('\\widehat', ''),
                             ('\\sum', '\u03a3'), ('\\exp', 'exp'),
                             ('\\times', '\u00d7'), ('\\theta', '\u03b8'),
                             ('\\phi', '\u03c6')]:
                math_text = math_text.replace(old, new)
            math_text = re.sub(r'_\{([^}]+)\}', r'_\1', math_text)
            math_text = re.sub(r'\^\{([^}]+)\}', r'^\1', math_text)
            # Remove any remaining LaTeX commands and control chars
            math_text = re.sub(r'\\[a-zA-Z]+', '', math_text)
            math_text = ''.join(c for c in math_text if ord(c) >= 32 or c in '\n\r\t')
            p = doc.add_paragraph()
            p.alignment = WD_ALIGN_PARAGRAPH.CENTER
            run = p.add_run(math_text)
            run.font.size = Pt(10)
            run.font.name = 'Cambria Math'
            run.italic = True
            continue

        if '|' in line and line.strip().startswith('|'):
            table_lines = []
            while i < len(lines) and '|' in lines[i]:
                table_lines.append(lines[i])
                i += 1
            add_md_table(doc, '\n'.join(table_lines))
            continue

        if line.strip():
            text = line.strip()

            # Detect bullet points and numbered lists
            is_bullet = text.startswith('- ')
            num_match = re.match(r'^(\d+)\.\s+', text)
            is_numbered = bool(num_match)

            if is_bullet:
                p = doc.add_paragraph(style='List Bullet')
                text = text[2:]  # strip '- '
            elif is_numbered:
                p = doc.add_paragraph(style='List Number')
                text = text[num_match.end():]  # strip '1. '
            else:
                p = doc.add_paragraph()

            # Protect significance stars from markdown bold/italic parsing
            # Only protect when stars follow a digit AND are followed by
            # space, comma, paren, pipe, end-of-string (not colon/letter)
            text = re.sub(r'(\d)\*\*\*(?=[\s,)|]|$)', r'\1⁂⁂⁂', text)
            text = re.sub(r'(\d)\*\*(?!\*)(?=[\s,)|]|$)', r'\1⁂⁂', text)
            text = re.sub(r'(\d)\*(?!\*)(?=[\s,)|]|$)', r'\1⁂', text)
            # Handle -- as em dash
            text = text.replace(' -- ', ' \u2014 ')
            parts = re.split(r'(\*\*\*[^*]+\*\*\*|\*\*[^*]+\*\*|\*[^*]+\*|\$[^$]+\$)', text)
            for part in parts:
                if part.startswith('***') and part.endswith('***'):
                    inner = part[3:-3].replace('⁂⁂⁂', '***').replace('⁂⁂', '**').replace('⁂', '*')
                    run = p.add_run(inner)
                    run.bold = True
                    run.italic = True
                    run.font.name = 'Times New Roman'
                    run.font.size = Pt(12)
                elif part.startswith('**') and part.endswith('**'):
                    inner = part[2:-2].replace('⁂⁂⁂', '***').replace('⁂⁂', '**').replace('⁂', '*')
                    run = p.add_run(inner)
                    run.bold = True
                    run.font.name = 'Times New Roman'
                    run.font.size = Pt(12)
                elif part.startswith('*') and part.endswith('*') and len(part) > 2:
                    inner = part[1:-1].replace('⁂⁂⁂', '***').replace('⁂⁂', '**').replace('⁂', '*')
                    run = p.add_run(inner)
                    run.italic = True
                    run.font.name = 'Times New Roman'
                    run.font.size = Pt(12)
                elif part.startswith('$') and part.endswith('$'):
                    math = part[1:-1]
                    for old, new in [('\\hat{\\beta}', '\u03b2\u0302'),
                                     ('\\beta', '\u03b2'), ('\\Delta', '\u0394'),
                                     ('\\times', '\u00d7'), ('\\gamma', '\u03b3'),
                                     ('\\theta', '\u03b8'), ('\\phi', '\u03c6'),
                                     ('\\alpha', '\u03b1'), ('\\varepsilon', '\u03b5'),
                                     ('\\text{', ''), ('\\log', 'log'),
                                     ('\\cdot', '\u00b7'), ('\\_', '_')]:
                        math = math.replace(old, new)
                    math = re.sub(r'_\{([^}]+)\}', r'_\1', math)
                    math = re.sub(r'\^\{([^}]+)\}', r'^\1', math)
                    math = re.sub(r'\\[a-zA-Z]+', '', math)
                    math = math.replace('{', '').replace('}', '')
                    math = ''.join(c for c in math if ord(c) >= 32 or c in '\n\r\t')
                    run = p.add_run(math)
                    run.italic = True
                    run.font.name = 'Cambria Math'
                    run.font.size = Pt(11)
                else:
                    restored = part.replace('⁂⁂⁂', '***').replace('⁂⁂', '**').replace('⁂', '*')
                    run = p.add_run(restored)
                    run.font.name = 'Times New Roman'
                    run.font.size = Pt(12)

        i += 1

    # Figures
    doc.add_page_break()
    p = doc.add_heading('Figures', level=0)
    p.alignment = WD_ALIGN_PARAGRAPH.CENTER
    for run in p.runs:
        run.font.size = Pt(16)
        run.font.name = 'Times New Roman'
        run.font.color.rgb = RGBColor(0, 0, 0)

    fig_dir = PROJECT / "paper" / "figures"
    figure_files = [
        ("Figure 1: EMU Demographic Divergence — Z₁ Trajectories to 2060",
         "fig1_z1_divergence.png"),
        ("Figure 2: EMU Regime Strain Index, 2040",
         "fig2_regime_strain_2040.png"),
        ("Figure 3: Forward Counterfactual — Would EMU Members Choose to Peg?",
         "fig3_ppeg_evolution.png"),
        ("Figure 4: The Regime-Contingent CA Effect — Z₁ Coefficient Across Subsamples",
         "fig4_ca_coefficient_comparison.png"),
    ]

    for label, filename in figure_files:
        filepath = fig_dir / filename
        if not filepath.exists():
            continue
        doc.add_page_break()
        p = doc.add_heading(label, level=1)
        for run in p.runs:
            run.font.size = Pt(13)
            run.font.name = 'Times New Roman'
            run.font.color.rgb = RGBColor(0, 0, 0)
        doc.add_picture(str(filepath), width=Inches(6))
        last_paragraph = doc.paragraphs[-1]
        last_paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER

    # Appendix tables
    doc.add_page_break()
    p = doc.add_heading('Appendix: Tables', level=0)
    p.alignment = WD_ALIGN_PARAGRAPH.CENTER
    for run in p.runs:
        run.font.size = Pt(16)
        run.font.name = 'Times New Roman'
        run.font.color.rgb = RGBColor(0, 0, 0)

    table_files = [
        ("Table 1: Summary Statistics", "summary_statistics.md"),
        ("Table 2: Demographics and Monetary Independence", "trilemma_mi.md"),
        ("Table 3: Demographics and Exchange Rate Stability", "trilemma_ers.md"),
        ("Table 4: Demographics and Financial Openness", "trilemma_fo.md"),
        ("Table 5: OECD vs Non-OECD Split", "oecd_trilemma_split.md"),
        ("Table 6: Age Decomposition", "trilemma_age_decomp.md"),
        ("Table 7: Peg vs Float Logit", "peg_vs_float_logit.md"),
        ("Table 8: Lagged Demographics", "lagged_trilemma.md"),
        ("Table 9: First Differences", "first_diff_trilemma.md"),
        ("Table 10: OECD Subsample & Excluding Financial Centers", "excl_fin_centers.md"),
        ("Table 11: Pre/Post GFC Split", "gfc_split.md"),
        ("Table 12: Logit vs LPM", "logit_vs_lpm.md"),
        ("Table 13: KAOPEN Mediation", "kaopen_mediation.md"),
        ("Table 14: CA Mediation", "ca_mediation.md"),
        ("Table 15: Joint Model", "joint_model.md"),
        ("Table 16: Within-EMU CA Regressions", "phase9_within_emu_ca.md"),
        ("Table 17: Full Panel Mediation Diagnostic", "phase8_full_panel_mediation.md"),
        ("Table 18: Subsample Mediation", "phase8_subsample_mediation.md"),
        ("Table 19: Eurozone Yield Spreads", "phase9_yield_spreads.md"),
        ("Table 20: Pre-Crisis vs Post-Crisis EMU", "phase9_pre_post_crisis.md"),
        ("Table 21: EMU Z Projections", "phase9_emu_projection_summary.md"),
        ("Table 22: Regime Strain Index", "phase9_regime_strain.md"),
        ("Table 23: Forward Counterfactual", "phase9_forward_counterfactual.md"),
        ("Table 24: Regime-Contingent CA", "phase8_regime_contingent.md"),
        ("Table 25: Eurozone vs Floaters", "eurozone_vs_floaters.md"),
        ("Table 26: EMU Counterfactual (Current)", "emu_counterfactual.md"),
        ("Table 27: EMU Dispersion", "emu_dispersion.md"),
        ("Table 28: Creditor-Debtor Asymmetry", "phase8_creditor_debtor.md"),
        ("Table A29: EMU Magnitude Robustness", "phase11_emu_robustness.md"),
        ("Table A30: Logit Robustness — Region and Income FE", "phase11_logit_robustness.md"),
        ("Table A31: Projection Sensitivity — ±1SE Bands", "phase11_projection_sensitivity.md"),
        ("Table A32: EMU Hub Exclusion and Post-2010 Robustness", "phase11_emu_hub_exclusion.md"),
        # Phase 10: Monetary Union Comparison
        ("Table A33: Within-Union CA Regressions — Cross-Union Comparison", "phase10_within_union_ca.md"),
        ("Table A34: Pooled Monetary Union Analysis", "phase10_pooled_unions.md"),
        ("Table A35: Within-Union Z Deviations → CA Deviations", "phase10_within_union_deviations.md"),
        ("Table A36: CFA Franc Zone Deep Dive — WAEMU vs CEMAC", "phase10_cfa_deep_dive.md"),
        ("Table A37: CFA Z₁ Projections to 2060", "phase10_cfa_projections.md"),
        ("Table A38: Union Members vs Non-Union", "phase10_union_vs_nonunion.md"),
        ("Table A39: Z₁ Coefficient Summary Across Unions", "phase10_z1_comparison.md"),
        ("Table A40: CFA Regime Strain Index", "phase10_cfa_regime_strain.md"),
        ("Table A41: ECCU Regime Strain Index", "phase10_eccu_regime_strain.md"),
        ("Table A42: CMA Regime Strain Index", "phase10_cma_regime_strain.md"),
        # Phase 10 Robustness
        ("Table A43: Robustness — CFA Excluding Oil Exporters", "phase10_robustness_cfa_oil.md"),
        ("Table A44: Robustness — Time Period Stability", "phase10_robustness_periods.md"),
        ("Table A45: Robustness — Additional Controls", "phase10_robustness_controls.md"),
        ("Table A46: Robustness — Excluding Outlier Countries", "phase10_robustness_outliers.md"),
        ("Table A47: Placebo — Non-Union Peggers vs Union Members vs Floaters", "phase10_robustness_placebo.md"),
        ("Table A48: Robustness — Z₁ × KAOPEN Within Unions", "phase10_robustness_kaopen.md"),
        ("Table A49: Robustness — Age Decomposition Across Unions", "phase10_robustness_age_decomp.md"),
        ("Table A50: Robustness — Winsorized CA/GDP", "phase10_robustness_winsorized.md"),
        ("Table A51: Robustness — CFA with GDP/Capita and Trade Controls", "phase10_robustness_cfa_controls.md"),
        # EMU-specific detail tables
        ("Table A52: EMU CA Detail", "phase10_emu_ca_detail.md"),
        ("Table A53: CFA CA Detail", "phase10_cfa_ca_detail.md"),
        ("Table A54: ECCU CA Detail", "phase10_eccu_ca_detail.md"),
        ("Table A55: CMA CA Detail", "phase10_cma_ca_detail.md"),
    ]

    for label, filename in table_files:
        filepath = TABLES_DIR / filename
        if not filepath.exists():
            continue

        doc.add_page_break()
        p = doc.add_heading(label, level=1)
        for run in p.runs:
            run.font.size = Pt(13)
            run.font.name = 'Times New Roman'
            run.font.color.rgb = RGBColor(0, 0, 0)

        sections = parse_md_file(filepath)
        for section in sections:
            if section[0] == 'table':
                _, title, md = section
                if title and title != label:
                    add_md_table(doc, md, title=title)
                else:
                    add_md_table(doc, md)
            elif section[0] == 'note':
                p = doc.add_paragraph()
                run = p.add_run(section[1])
                run.italic = True
                run.font.size = Pt(9)
                run.font.name = 'Times New Roman'

    doc.save(str(OUTPUT))
    print(f"Saved: {OUTPUT}")
    print(f"Size: {OUTPUT.stat().st_size / 1024:.0f} KB")


if __name__ == '__main__':
    build_docx()
